gg:graphic grammar ggplot: data, aesthetic mappings

library(rnoaa)
## Warning: package 'rnoaa' was built under R version 3.3.2
weather_df = 
  rnoaa::meteo_pull_monitors(c("USW00094728", "USC00519397", "USS0023B17S"),
                      var = c("PRCP", "TMIN", "TMAX"), 
                      date_min = "2017-01-01",
                      date_max = "2017-12-31") %>%
  mutate(
    name = recode(id, USW00094728 = "CentralPark_NY", 
                      USC00519397 = "Waikiki_HA",
                      USS0023B17S = "Waterhole_WA"),
    tmin = tmin / 10,
    tmax = tmax / 10) %>%
  select(name, id, everything())
weather_df
## # A tibble: 1,095 x 6
##    name           id          date        prcp  tmax  tmin
##    <chr>          <chr>       <date>     <dbl> <dbl> <dbl>
##  1 CentralPark_NY USW00094728 2017-01-01     0   8.9   4.4
##  2 CentralPark_NY USW00094728 2017-01-02    53   5     2.8
##  3 CentralPark_NY USW00094728 2017-01-03   147   6.1   3.9
##  4 CentralPark_NY USW00094728 2017-01-04     0  11.1   1.1
##  5 CentralPark_NY USW00094728 2017-01-05     0   1.1  -2.7
##  6 CentralPark_NY USW00094728 2017-01-06    13   0.6  -3.8
##  7 CentralPark_NY USW00094728 2017-01-07    81  -3.2  -6.6
##  8 CentralPark_NY USW00094728 2017-01-08     0  -3.8  -8.8
##  9 CentralPark_NY USW00094728 2017-01-09     0  -4.9  -9.9
## 10 CentralPark_NY USW00094728 2017-01-10     0   7.8  -6  
## # ... with 1,085 more rows

Start a plot

ggplot(weather_df, aes(x = tmin, y = tmax)) +
  geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name),alpha = 0.1) + 
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(weather_df, aes(x = tmin, y = tmax, color = name)) + 
  geom_point(alpha = 0.5) + 
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(weather_df, aes(x = tmin, y = tmax, color = name)) + 
  geom_point(alpha = 0.5) + 
  geom_smooth(se = FALSE) +
  facet_grid( ~ name)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning: Removed 15 rows containing missing values (geom_point).

## A more interesting plot

ggplot(weather_df,aes(x = date, y = tmax, color = name)) +
  geom_point()
## Warning: Removed 3 rows containing missing values (geom_point).

ggplot(weather_df,aes(x = date, y = tmax, color = name)) +
  geom_line()

ggplot(weather_df,aes(x = date, y = tmax, color = name, size = prcp)) +
  geom_point() + 
  geom_smooth(se = FALSE) +
  facet_grid( ~ name)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).

## Warning: Removed 3 rows containing missing values (geom_point).

univariate

#little quiz
ggplot(weather_df) + geom_point(aes(x = tmax, y = tmin), color = "blue")
## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(weather_df) + geom_point(aes(x = tmax, y = tmin, color = "blue"))
## Warning: Removed 15 rows containing missing values (geom_point).

# Histograms! fill is filled, color is holly
ggplot(weather_df,aes(x = tmax, fill = name)) + 
  geom_histogram() +
  facet_grid(~name)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing non-finite values (stat_bin).

ggplot(weather_df,aes(x = tmax, fill = name)) + 
  geom_density() +
  facet_grid(~name)
## Warning: Removed 3 rows containing non-finite values (stat_density).

# Violin plot
ploti = ggplot(weather_df, aes(x = tmax, y = name)) +
  geom_density_ridges()
ggsave("density_ridges.pdf",ploti,width = 8, height = 5)
## Picking joint bandwidth of 1.84
## Warning: Removed 3 rows containing non-finite values (stat_density_ridges).

10/2 class viz 2.0

devtools::install_github("thomasp85/patchwork")
## Skipping install of 'patchwork' from a github remote, the SHA1 (fd7958ba) has not changed since last install.
##   Use `force = TRUE` to force installation
library(tidyverse)
library(ggridges)
library(patchwork)

First scatterplot

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    caption = "Data from the rnoaa package"
  ) +
  #Tick marks and lables
  scale_x_continuous(
    breaks = c(-15, 0, 15),
    labels = c("-15º C", "0º C", "15º C"), #put units on the breaks
    limits = c(-20, 42) #when you really want to show some scale
  ) +
  scale_y_continuous(
    position = "right", #y axis to the right!
    trans = "sqrt"
  )
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 90 rows containing missing values (geom_point).

## colors and themes

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    caption = "Data from the rnoaa package"
  ) +
  viridis::scale_color_viridis(
    name = "Location", 
    discrete = TRUE
  ) + 
  theme(legend.position = "bottom")
## Warning: Removed 15 rows containing missing values (geom_point).

#legend.position = "none" will remove the legend

Themes

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    caption = "Data from the rnoaa package"
  ) +
  viridis::scale_color_viridis(
    name = "Location", 
    discrete = TRUE
  ) + 
  theme_bw() +
  theme(legend.position = "bottom")
## Warning: Removed 15 rows containing missing values (geom_point).

# or theme_classic or theme_minimum
# use ggthemes to do more

practice Revisit the plot showing tmax against date for each location. Use labels, scale options, and theme changes to improve the readability of this plot.

ggplot(weather_df, aes(date, tmax,color = name)) +
  geom_point() +
  labs(
    title = "Temperature vs date",
    x = "Date",
    y = "max temperature",
    caption = "Data from the rnoaa packages"
  ) +
  viridis::scale_color_viridis(
    name = "location",
    discrete = TRUE
    ) + 
  theme_minimal() + 
  theme(legend.position = "bottom")
## Warning: Removed 3 rows containing missing values (geom_point).

Arguments to geom_*

central_park_df = 
  weather_df %>% 
  filter(name == "CentralPark_NY")

waikiki_df = 
  weather_df %>% 
  filter(name == "Waikiki_HA")

ggplot(waikiki_df, aes(x = date, y = tmax, color = name)) + 
  geom_point() + 
  geom_line(data = central_park_df) +
  theme(legend.position = "bottom")
## Warning: Removed 3 rows containing missing values (geom_point).

patchwork